import argparse
import timeit
import warnings
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import load_iris, load_digits


def get_arguments():
    parser = argparse.ArgumentParser(description='MLP')
    parser.add_argument('--dataset', type=int, default=2, choices=(1, 2),
                        help='the type of dataset'
                             '1: the wine dataset,'
                             '2: the digits dataset')
    parser.add_argument('--features', type=list, default=['f0', 'f1', 'f2', 'f3'],
                        help="the features of iris datasets for regression, "
                             "element of parameter should be 'f0', 'f1', 'f2' or 'f3'")
    parser.add_argument('--classes', type=list, default=[0, 1, 2],
                        help='the classes of iris datasets for classify, element of parameter should be 0, 1, 2')
    parser.add_argument('--test_size', type=float, default=0.33, help='the proportion of test data')
    parser.add_argument('--random_state', type=int, default=42, help='the random seed of dataset split')

    args = parser.parse_args()
    return args


class MyPreprocessing:
    def __init__(self, parser):
        self.dataset = parser.dataset
        self.random_state = parser.random_state
        self.test_size = parser.test_size
        self.classes = parser.classes
        self.features = parser.features

    def load_dataset(self):
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            if self.dataset == 1:
                dataset = load_iris()
                datas = dataset.data.astype(float)
                target = dataset.target.astype(int)
                df = pd.DataFrame({'f0': datas[:, 0],
                                   'f1': datas[:, 1],
                                   'f2': datas[:, 2],
                                   'f3': datas[:, 3],
                                   'label': target})
                datas, target = self.choose_dataset(df)
            elif self.dataset == 2:
                dataset = load_digits()
                datas = dataset.data
                target = dataset.target
                plt.imshow(datas[4].reshape(8, 8), cmap='gray')
                plt.title('label = ' + str(target[4]))
                plt.show()
            return datas, target

    def choose_dataset(self, df):
        df = df[df.label.isin(self.classes)]  # 选取特定标签
        datas = df[self.features].values  # 选取特定特征
        labels = df[['label']].values  # 读取标签
        return datas, labels

    def split_dataset(self, datas, targets):
        assert 0 < self.test_size < 1, "Please choose right test size between 0 and 1"
        test_num = int(self.test_size * len(labels))
        labels.resize((len(labels), 1))  # 将标签升维
        data_target = np.concatenate([datas, labels], axis=1)  # 拼接
        np.random.seed(self.random_state)
        np.random.shuffle(data_target)
        X_test = data_target[:test_num, :-1]
        y_test = data_target[:test_num, -1]
        X_train = data_target[test_num:, :-1]
        y_train = data_target[test_num:, -1]
        return X_train, y_train, X_test, y_test

    def one_hot(self, labels):
        labels = np.array(labels, dtype=int)
        y = int(np.max(labels)) + 1
        one_hot = np.eye(y)[labels]
        return one_hot


class BPNN(object):
    def __init__(self, hidden_layer_sizes, activation, learning_rate):
        # 根据样本特征数、隐藏层数、每层神经元数量，随机初始化权重和偏置
        self.layers = len(hidden_layer_sizes)
        self.weights = [np.random.randn(hidden_layer_sizes[i], hidden_layer_sizes[i+1])
                        for i in range(self.layers - 1)]
        self.biases = [np.random.randn(hidden_layer_sizes[i+1]) for i in range(self.layers - 1)]
        self.activation = activation
        self.learning_rate = learning_rate
        self.epochs = 0

    # 前向传播
    def feedforward(self, x):
        hidden_z = [x]
        hidden_x = [x]
        for w, b in zip(self.weights, self.biases):
            b = np.array([b for i in range(np.shape(x)[0])])    # 将b堆叠，多样本一起训练
            z = np.dot(x, w) + b
            hidden_z.append(z)
            if self.activation == 'identity':
                x = z
            elif self.activation == 'relu':
                x = self.relu(z)
            elif self.activation == 'sigmoid':
                x = self.sigmoid(z)
            else:
                raise ValueError("Please choose right activation function~")
            hidden_x.append(x)
        # 返回每层的激活值
        return hidden_z, hidden_x

    # 反向传播
    def backpropagation(self, y, hidden_z, hidden_x):
        nabla_w = [np.zeros_like(w) for w in self.weights]
        nabla_b = [np.zeros_like(b) for b in self.biases]
        # 计算输出层神经元梯度
        nabla_out = (1 - y) / (1 - hidden_x[-1]) - y / hidden_x[-1]
        # nabla_out = np.mean(nabla_out, axis=0)
        # 计算隐层神经元梯度
        for i in range(self.layers-1, 0, -1):
            if self.activation == 'identity':
                nabla_z = nabla_out
            elif self.activation == 'relu':
                nabla_z = self.relu_diff(nabla_out, hidden_z[i])
            elif self.activation == 'sigmoid':
                nabla_z = self.sigmoid_diff(nabla_out, hidden_x[i])
            else:
                raise ValueError
            nabla_out = nabla_z @ self.weights[i-1].T
            nabla_w[i-1] = np.mean(hidden_x[i-1], axis=0).reshape(-1, 1) @ np.mean(nabla_z, axis=0).reshape(1, -1)
            nabla_b[i-1] = np.mean(nabla_z, axis=0)

        # 返回每层的权重梯度和偏置梯度
        return nabla_w, nabla_b

    # 更新参数
    def update_weight(self, nabla_w, nabla_b):
        for i in range(self.layers-1):
            self.weights[i] = self.weights[i] - self.learning_rate * nabla_w[i]
            self.biases[i] = self.biases[i] - self.learning_rate * nabla_b[i]

    # 代价函数
    def loss(self, y, y_pre):
        entropy = 0
        for i in range(np.shape(y)[0]):
            entropy += np.dot(y[i],  np.log(y_pre[i])) + np.dot((1 - y[i]), np.log(1 - y_pre[i]))
        loss = -1 * np.mean(entropy) / np.shape(y)[0]
        return loss

    def relu(self, x):
        x[x < 0] = 0
        return x

    def relu_diff(self, dx, x):    #relu函数导数
        dx[x <= 0] = 0
        return dx

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_diff(self, dx, x):   #sigmoid函数导数
        dx = dx * x * (1 - x)
        return dx

    # 训练过程
    def fit(self, X_train, y_train, num_epoches):
        self.epochs = num_epoches
        losses = []
        # num_epoches为迭代次数
        for epoch_id in range(num_epoches):
            # 1.经典的四步训练流程：前向计算->计算损失->计算梯度->更新参数（分别调用类的方法）
            hidden_z, hidden_x = self.feedforward(X_train)
            nabla_w, nabla_b = self.backpropagation(y_train, hidden_z, hidden_x)
            self.update_weight(nabla_w, nabla_b)
            # 2.将每次迭代的代价函数的值存起来，用于绘制损失曲线（也可在循环中输出每次迭代的损失值，查看中间过程）
            loss = self.loss(y_train, hidden_x[-1])
            losses.append(loss)
            # 3.可将损失函数数组/列表返回
            if (epoch_id + 1) % 100 == 0:
                print("The {} epoch, loss = {}".format(epoch_id+1, loss))
        return losses

    def predict(self, X_test, y_test):
        x = X_test
        for w, b in zip(self.weights, self.biases):
            b = np.array([b for i in range(np.shape(x)[0])])  # 将b堆叠，多样本一起训练
            z = np.dot(x, w) + b
            if self.activation == 'identity':
                x = z
            elif self.activation == 'relu':
                x = self.relu(z)
            elif self.activation == 'sigmoid':
                x = self.sigmoid(z)
            else:
                raise ValueError
        loss = self.loss(y_test, x)
        return loss

    def draw(self, loss):
        # x = np.arange(self.epochs, 100)
        plt.plot(loss)
        plt.xlabel('epoch')
        plt.ylabel('loss')
        plt.show()


if __name__ == '__main__':
    parser = get_arguments()

    # 加载数据集
    MyPreprocessing = MyPreprocessing(parser)
    datas, labels = MyPreprocessing.load_dataset()
    X_train, y_train, X_test, y_test = MyPreprocessing.split_dataset(datas, labels)
    y_train = MyPreprocessing.one_hot(y_train)
    y_test = MyPreprocessing.one_hot(y_test)

    MyMLP = BPNN(hidden_layer_sizes=(64, 32, 16, 10), activation='sigmoid', learning_rate=0.0001)
    loss = MyMLP.fit(X_train, y_train, 20000)
    MyMLP.draw(loss)

    pre_loss = MyMLP.predict(X_test, y_test)
    print("The predict loss is {}".format(pre_loss))






